library(nycflights13)
library(tidyverse)
-- Attaching packages --------------------------------------- tidyverse 1.2.1 --
v ggplot2 3.0.0     v purrr   0.2.5
v tibble  1.4.2     v dplyr   0.7.6
v tidyr   0.8.1     v stringr 1.3.1
v readr   1.1.1     v forcats 0.3.0
-- Conflicts ------------------------------------------ tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()

nycflights13

flights

Filter Rows

# filter by flights on the 1st of january
(jan1 <- filter(flights, month == 1, day == 1))

Comparisons

sqrt(2) ^ 2 == 2
[1] FALSE
1 / 49 * 49 == 1
[1] FALSE
near(sqrt(2) ^ 2, 2)
[1] TRUE
near(1 / 49 * 49, 1)
[1] TRUE

Logical operators

(nov_dec <- filter(flights, month == 11 | month == 12))
# using shorthand
(nov_dec <- filter(flights, month %in% c(11, 12)))
# using demorgan's
(delay_range <- filter(flights, !(arr_delay > 120 | dep_delay > 120)))

Missing Values

NA > 5
[1] NA
10 == NA
[1] NA
NA == NA
[1] NA
x <- NA
is.na(x)
[1] TRUE
df <- tibble(x = c(1, NA, 3))
filter(df, x > 1)
filter(df, is.na(x) | x > 1)

Exercises

# 1
filter(flights, arr_delay >= 120)
filter(flights, dest == "IAH" | dest == "HOU")
filter(flights, carrier == "UA" | carrier == "AA" | carrier == "DL")
filter(flights, month %in% c(7, 8, 9))
filter(flights, dep_delay <= 0, arr_delay > 120)
filter(flights, dep_delay >= 60, dep_delay - arr_delay > 30)
filter(flights, between(dep_time, 0, 600) | dep_time == 2400)
filter(flights, is.na(dep_time))

Arrange Rows

arrange(flights, year, month, day)
arrange(flights, desc(dep_delay))
arrange(flights, dep_delay)
# Missing values are always sorted at the end
df <- tibble(x = c(5, 2, NA))
arrange(df, x)
arrange(df, desc(x))

Exercises

# 1
arrange(flights, desc(is.na(dep_time)), dep_time)
# 2
# Most delayed
arrange(flights, desc(dep_delay))
# Earliest
arrange(flights, dep_delay)
# 3
# total air time
arrange(flights, air_time)
# average air time
arrange(flights, distance / air_time * 60)
# 4
# interpretation 1
arrange(flights, desc(distance))
arrange(flights, distance)
# interpretation 2
arrange(flights, desc(air_time))
arrange(flights, air_time)

Select Columns

select(flights, year, month, day)
# Select range of columns
select(flights, year:day)
# Exclude columns
select(flights, -(year:day))
(renamed <- rename(flights, tail_num = tailnum))
LS0tDQp0aXRsZTogIkNoYXB0ZXIgNSINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQoNCg0KYGBge3J9DQpsaWJyYXJ5KG55Y2ZsaWdodHMxMykNCmxpYnJhcnkodGlkeXZlcnNlKQ0KYGBgDQoNCiMgbnljZmxpZ2h0czEzDQpgYGB7cn0NCmZsaWdodHMNCmBgYA0KDQojIEZpbHRlciBSb3dzDQpgYGB7cn0NCiMgZmlsdGVyIGJ5IGZsaWdodHMgb24gdGhlIDFzdCBvZiBqYW51YXJ5DQooamFuMSA8LSBmaWx0ZXIoZmxpZ2h0cywgbW9udGggPT0gMSwgZGF5ID09IDEpKQ0KYGBgDQoNCiMgQ29tcGFyaXNvbnMNCmBgYHtyfQ0Kc3FydCgyKSBeIDIgPT0gMg0KMSAvIDQ5ICogNDkgPT0gMQ0KbmVhcihzcXJ0KDIpIF4gMiwgMikNCm5lYXIoMSAvIDQ5ICogNDksIDEpDQoNCmBgYA0KDQojIExvZ2ljYWwgb3BlcmF0b3JzDQpgYGB7cn0NCihub3ZfZGVjIDwtIGZpbHRlcihmbGlnaHRzLCBtb250aCA9PSAxMSB8IG1vbnRoID09IDEyKSkNCg0KIyB1c2luZyBzaG9ydGhhbmQNCihub3ZfZGVjIDwtIGZpbHRlcihmbGlnaHRzLCBtb250aCAlaW4lIGMoMTEsIDEyKSkpDQojIHVzaW5nIGRlbW9yZ2FuJ3MNCihkZWxheV9yYW5nZTEgPC0gZmlsdGVyKGZsaWdodHMsICEoYXJyX2RlbGF5ID4gMTIwIHwgZGVwX2RlbGF5ID4gMTIwKSkpDQpgYGANCg0KIyBNaXNzaW5nIFZhbHVlcw0KYGBge3J9DQpOQSA+IDUNCjEwID09IE5BDQpOQSA9PSBOQQ0KeCA8LSBOQQ0KaXMubmEoeCkNCmRmIDwtIHRpYmJsZSh4ID0gYygxLCBOQSwgMykpDQpmaWx0ZXIoZGYsIHggPiAxKQ0KZmlsdGVyKGRmLCBpcy5uYSh4KSB8IHggPiAxKQ0KYGBgDQoNCiMgRXhlcmNpc2VzDQpgYGB7cn0NCiMgMQ0KZmlsdGVyKGZsaWdodHMsIGFycl9kZWxheSA+PSAxMjApDQpmaWx0ZXIoZmxpZ2h0cywgZGVzdCA9PSAiSUFIIiB8IGRlc3QgPT0gIkhPVSIpDQpmaWx0ZXIoZmxpZ2h0cywgY2FycmllciA9PSAiVUEiIHwgY2FycmllciA9PSAiQUEiIHwgY2FycmllciA9PSAiREwiKQ0KZmlsdGVyKGZsaWdodHMsIG1vbnRoICVpbiUgYyg3LCA4LCA5KSkNCmZpbHRlcihmbGlnaHRzLCBkZXBfZGVsYXkgPD0gMCwgYXJyX2RlbGF5ID4gMTIwKQ0KZmlsdGVyKGZsaWdodHMsIGRlcF9kZWxheSA+PSA2MCwgZGVwX2RlbGF5IC0gYXJyX2RlbGF5ID4gMzApDQpmaWx0ZXIoZmxpZ2h0cywgYmV0d2VlbihkZXBfdGltZSwgMCwgNjAwKSB8IGRlcF90aW1lID09IDI0MDApDQpmaWx0ZXIoZmxpZ2h0cywgaXMubmEoZGVwX3RpbWUpKQ0KYGBgDQoNCiMgQXJyYW5nZSBSb3dzDQpgYGB7cn0NCmFycmFuZ2UoZmxpZ2h0cywgeWVhciwgbW9udGgsIGRheSkNCmFycmFuZ2UoZmxpZ2h0cywgZGVzYyhkZXBfZGVsYXkpKQ0KYXJyYW5nZShmbGlnaHRzLCBkZXBfZGVsYXkpDQoNCiMgTWlzc2luZyB2YWx1ZXMgYXJlIGFsd2F5cyBzb3J0ZWQgYXQgdGhlIGVuZA0KZGYgPC0gdGliYmxlKHggPSBjKDUsIDIsIE5BKSkNCmFycmFuZ2UoZGYsIHgpDQphcnJhbmdlKGRmLCBkZXNjKHgpKQ0KYGBgDQoNCiMgRXhlcmNpc2VzDQpgYGB7cn0NCiMgMQ0KYXJyYW5nZShmbGlnaHRzLCBkZXNjKGlzLm5hKGRlcF90aW1lKSksIGRlcF90aW1lKQ0KIyAyDQojIE1vc3QgZGVsYXllZA0KYXJyYW5nZShmbGlnaHRzLCBkZXNjKGRlcF9kZWxheSkpDQojIEVhcmxpZXN0DQphcnJhbmdlKGZsaWdodHMsIGRlcF9kZWxheSkNCiMgMw0KIyB0b3RhbCBhaXIgdGltZQ0KYXJyYW5nZShmbGlnaHRzLCBhaXJfdGltZSkNCiMgYXZlcmFnZSBhaXIgdGltZQ0KYXJyYW5nZShmbGlnaHRzLCBkaXN0YW5jZSAvIGFpcl90aW1lICogNjApDQojIDQNCiMgaW50ZXJwcmV0YXRpb24gMQ0KYXJyYW5nZShmbGlnaHRzLCBkZXNjKGRpc3RhbmNlKSkNCmFycmFuZ2UoZmxpZ2h0cywgZGlzdGFuY2UpDQojIGludGVycHJldGF0aW9uIDINCmFycmFuZ2UoZmxpZ2h0cywgZGVzYyhhaXJfdGltZSkpDQphcnJhbmdlKGZsaWdodHMsIGFpcl90aW1lKQ0KYGBgDQoNCiMgU2VsZWN0IENvbHVtbnMNCmBgYHtyfQ0Kc2VsZWN0KGZsaWdodHMsIHllYXIsIG1vbnRoLCBkYXkpDQojIFNlbGVjdCByYW5nZSBvZiBjb2x1bW5zDQpzZWxlY3QoZmxpZ2h0cywgeWVhcjpkYXkpDQojIEV4Y2x1ZGUgY29sdW1ucw0Kc2VsZWN0KGZsaWdodHMsIC0oeWVhcjpkYXkpKQ0KKHJlbmFtZWQgPC0gcmVuYW1lKGZsaWdodHMsIHRhaWxfbnVtID0gdGFpbG51bSkpDQojIE1vdmUgc29tZSB2YXJpYWJsZXMgdG8gdGhlIHN0YXJ0IG9mIHRoZSBkYXRhZnJhbWUNCmBgYA0K